Thesis Files/MainQLearningXGB_OptStratGeneratorThesis.R

library(ReinforcementLearningwithR)
require(compiler)
library(tictoc)

# Basic pool
######

strat <- c("Mind", "nice.tit.for.tat", "probably.nice.Base", "Rainbow.Unicorns.one", "seda.strat2", "ta.daaa", "tft.forgive.slow", "the.undertaker.1", "TikTak1")
antistrat <- get.antistrat()[strat]

# Reference pool of standard strategies
######
#strat <- c("always.coop","always.defect","random.action","grim.trigger","tit.for.tat","generous.tit.for.tat", "contrite.tit.for.tat", "tft.2forgive","pavlov","net.nice0")
#antistrat <- rep("none",length(strat))

# Manual strategies
######
#strat <- c("contrite.tit.for.tat")
#antistrat <- rep("counter.contrite.tit.for.tat")

set.seed(123456)
if(length(strat)==1){
  file.name <- paste0("opt.run.",strat,".",Sys.Date(),".XGB")
} else {
  file.name <- paste0("opt.run.all.",Sys.Date(),".XGB")
}

continue <- FALSE

a.MC <- c(0.25)
a.Q <- c(0.25)
hybrid.decay <- c(0.9)
block.no <- 150 #Number of Blocks to Play
eval.no <- 1000 #Evaluation at the end
rounds.no <- 60 #Number of periods of game

generate.best.strat <- function(strat, antistrat, a.MC, a.Q, hybrid.decay, block.no, eval.no, rounds.no, continue=FALSE, file.name){
  restore.point("generate.best.strat")
  for(k in 1:length(strat)){
    my.strat <- strat[k]
    my.antistrat <- antistrat[k]

    encoding.state <- c("Main.real")
    eval.strat <- Model.strat.Main.real.Exp.Path

    game.object <- Get.Game.Object.PD(encoding.state = encoding.state, eval.strategy = eval.strat, encoding.params=NULL)

    game.object$game.pars$T <- rounds.no
    game.object$game.pars$T.max <- rounds.no
    game.object$game.pars$delta <- 0.95
    game.object$game.pars$other.strategies <- c(get(my.strat))
    names(game.object$game.pars$other.strategies) <- my.strat
    game.object$game.pars$err.D.prob <- 0.15
    game.object$game.pars$err.C.prob <- 0.0
    assign("game.object",game.object,envir=.GlobalEnv)

    algo.par <- Get.Def.Par.QLearningPersExpPath()

    algo.par$action.policy <- "exploration.path" #May be 'exploration.path' or 'epsilon.greedy'

    algo.par$expl.path.multi.start <- 0.85
    algo.par$expl.path.multi.end <- 0.85
    algo.par$expl.path.multi.decay.type <- "linear"
    algo.par$expl.path.multi.best.db <- 100
    algo.par$expl.path.multi.best.disc <- 0.98
    algo.par$expl.path.multi.Kp.var <- 0.005 # proportional gain
    algo.par$expl.path.multi.Ki.var <- 0.00001 # integral time
    algo.par$expl.path.multi.Kd.var <- 0.000005 # derivative time
    algo.par$expl.path.multi.Kp.shock <- 0.005 # proportional gain
    algo.par$expl.path.multi.Ki.shock <- 0.00001 # integral time
    algo.par$expl.path.multi.Kd.shock <- 0.000005 # derivative time
    algo.par$expl.path.multi.Kp.surp <- 0.5 # proportional gain
    algo.par$expl.path.multi.Ki.surp <- 0.2 # integral time
    algo.par$expl.path.multi.Kd.surp <- 0.05 # derivative time
    algo.par$expl.path.multi.Kp.fam <- 0.5 # proportional gain
    algo.par$expl.path.multi.Ki.fam <- 0.02 # integral time
    algo.par$expl.path.multi.Kd.fam <- 0.01 # derivative time
    algo.par$expl.path.multi.Kp.db <- 5 # data base for proportional
    algo.par$expl.path.multi.Ki.db <- 100 # data base for integral
    algo.par$expl.path.multi.Kd.db <- 5 # data base for calculating derivative
    algo.par$expl.path.multi.Kp.disc <- 0.95 # discounting factor for proportional derivative
    algo.par$expl.path.multi.Ki.disc <- 0.99 # discounting factor for determining relevance of points for integral
    algo.par$expl.path.multi.Kd.disc <- 0.95 # discounting factor for determining derivative

    algo.par$expl.path.multi.start.var <- 0.1
    algo.par$expl.path.multi.start.shock <- 0.1
    algo.par$expl.path.multi.start.surp <- 0
    algo.par$expl.path.multi.start.fam <- 0

    algo.par$expl.path.multi.start.frac.var <- 0.5
    algo.par$expl.path.multi.start.frac.shock <- 0.5
    algo.par$expl.path.multi.start.frac.surp <- 0
    algo.par$expl.path.multi.start.frac.fam <- 0
    algo.par$expl.path.multi.end.frac.var <- 0.5
    algo.par$expl.path.multi.end.frac.shock <- 0.5
    algo.par$expl.path.multi.end.frac.surp <- 0
    algo.par$expl.path.multi.end.frac.fam <- 0

    algo.par$gamma <- game.object$game.pars$delta
    algo.par$a <- a.Q
    algo.par$replay.intensive <- 1
    algo.par$curio.beta <- 0

    algo.par$block.curr <- 1
    algo.par$block.best <- 1
    algo.par$block.expl.var <- 0
    algo.par$block.expl.shock <- 0
    algo.par$block.expl.surp <- 0
    algo.par$block.expl.fam <- 0
    algo.par$block.expl.vs <- 0
    algo.par$block.expl.multi <- 2


    block.length <- (algo.par$block.curr+algo.par$block.best+algo.par$block.expl.var+algo.par$block.expl.shock+algo.par$block.expl.surp+algo.par$block.expl.fam+algo.par$block.expl.vs+algo.par$block.expl.multi)*game.object$game.pars$T
    algo.par$force.last <- block.length*1
    algo.par$batch.size <- block.length*50
    algo.par$max.mem <- block.length*100


    blocks <- block.no #new: 1000 #main 250
    algo.par$hybrid.Q.a.MC <- a.MC
    algo.par$hybrid.Q.apply <- "always" #also used for MC
    algo.par$hybrid.decay <- hybrid.decay
    algo.par$only.experienced <- TRUE
    algo.par$use.rnn <- FALSE

    algo.par$hybrid.Q <- TRUE
    algo.par$Q.Learning <- FALSE
    algo.par$MC <- FALSE
    algo.par$hybrid.switch <- TRUE

    model.par <- Get.Def.Par.XGBoost()
    model.par$nrounds <- 50
    model.par$max_depth <- 5
    model.par$eta <- 0.3
    model.par$gamma <- 0.1
    model.par$colsample <- 0.95
    model.par$subsample <- 0.9
    model.par$min_child_weight <- 1
    model.par$nthread <- detectCores()-2

    evaluator <- Setup.QLearningPersExpPath(game.object, algo.par=algo.par, model.par=model.par)
    algo.var <- Initialise.QLearningPersExpPath(game.object, algo.par, memory.init="self.play", memory.param=list(no=100), model.par=model.par)

    res <- Train.QLearningPersExpPath(evaluator=evaluator, model.par=model.par, algo.par=algo.par, algo.var=algo.var, game.object = game.object, blocks=blocks, eval.only=FALSE, start.w.training = TRUE,out.file=paste0(file.name,".tmp"))

    #Save Memory & model
    evaluator <- res$evaluator
    algo.var$memory <- res$algo.var$memory
    algo.var$analysis <- res$algo.var$analysis
    algo.var$epsilon <- res$algo.var$epsilon
    algo.var$path.goal.var <- res$algo.var$path.goal.var
    algo.var$path.goal.shock <- res$algo.var$path.goal.shock
    algo.var$path.goal.surp <- res$algo.var$path.goal.surp
    algo.var$path.goal.fam <- res$algo.var$path.goal.fam
    algo.var$path.goal.multi <- res$algo.var$path.goal.multi
    algo.var$expl.path.var <- res$algo.var$expl.path.var
    algo.var$expl.path.shock <- res$algo.var$expl.path.shock
    algo.var$expl.path.surp <- res$algo.var$expl.path.surp
    algo.var$expl.path.fam <- res$algo.var$expl.path.fam
    algo.var$expl.path.multi <- res$algo.var$expl.path.multi

    #Save Memory & model
    idio.name <- paste0("opt.run.XGB.full.",my.strat)
    file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"),"before.StratTourn", sep=" ")
    save(evaluator, algo.var, algo.par, game.object, model.par, file=file.name)

    # Init game
    game = make.pd.game(err.D.prob=0.15, delta=0.95)

    if(my.antistrat!="none"){
      strat.tourn = nlist(Model.strat.Main.real.Exp.Path,get(my.strat), get(my.antistrat))
    } else {
      strat.tourn = nlist(Model.strat.Main.real.Exp.Path,get(my.strat))
    }


    tourn = init.tournament(game=game, strat=strat.tourn)
    set.seed(234567)
    tourn = run.tournament(tourn=tourn, R = eval.no, T.max=rounds.no)
    r.limit <- get.matches.vs.matrix(tourn$dt)["Model.strat.Main.real.Exp.Path","get(my.strat)"]
    print(paste0("r.limit: ",r.limit))

    file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"), sep=" ")

    #Save Memory & model
    save(evaluator, algo.var, algo.par, game.object, model.par, r.limit, tourn, file=file.name)

  }
}

disable.restore.points(TRUE)
enableJIT(3)
generate.best.strat(strat=strat, antistrat=antistrat, a.MC=a.MC, a.Q=a.Q, hybrid.decay=hybrid.decay, block.no, eval.no, rounds.no=rounds.no, continue=continue, file.name=file.name)
MartinKies/USLR documentation built on Nov. 10, 2019, 5:24 a.m.